#!/usr/bin/env python
#coding=utf-8
#@Time : 2017/11/22 15:29
#@Author : ChenMei

import requests
from bs4 import BeautifulSoup
import gzip
from StringIO import StringIO
import urllib2
import datetime
import time
import sys
import re
import os

# reload(sys)
# sys.setdefaultencoding('utf-8')
# headers = {
#     'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6',
#     'Cookie': 'tt_webid=6487802632568473101',
# }
# para = "guoji"
# '''
# 测试
# '''
# for i in range(20):
#     if i == 0:
#         url = "http://temp.163.com/special/00804KVA/cm_" + para + ".js?callback=data_callback"
#     elif i < 10:
#         url = "http://temp.163.com/special/00804KVA/cm_" + para + "_0" + str(i) + ".js?callback=data_callback"
#     else:
#         url = "http://temp.163.com/special/00804KVA/cm_" + para + "_" + str(i) + ".js?callback=data_callback"
#
#     req = requests.get(url, headers)
#     pattern = re.compile(r'"docurl":"(.*?)"')
#     newsurls = pattern.findall(req.text)
#     for urls in newsurls:
#         print urls

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6',
    'Cookie': 'tt_webid=6487802632568473101',
}
# 获取系统时间戳作为参数，得到最新的数据
timepara = str((int)(time.time()))
url = "http://news.people.com.cn/210801/211150/index.js?_=" + timepara
print url
req = requests.get(url, headers=headers)
pattern = re.compile(r'"title":"(.*?)","url":"(.*?)","date":"(.*?)"')
pats = pattern.findall(req.text)
for p in pats:
    print p[1]



